//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Scanf/printf implementation for use in *Sanitizer interceptors.
// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
// with a few common GNU extensions.
//
//===----------------------------------------------------------------------===//

#include <stdarg.h>

static const char *parse_number(const char *p, int *out) {
  *out = internal_atoll(p);
  while (*p >= '0' && *p <= '9')
    ++p;
  return p;
}

static const char *maybe_parse_param_index(const char *p, int *out) {
  // n$
  if (*p >= '0' && *p <= '9') {
    int number;
    const char *q = parse_number(p, &number);
    CHECK(q);
    if (*q == '$') {
      *out = number;
      p = q + 1;
    }
  }

  // Otherwise, do not change p. This will be re-parsed later as the field
  // width.
  return p;
}

static bool char_is_one_of(char c, const char *s) {
  return !!internal_strchr(s, c);
}

static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
  if (char_is_one_of(*p, "jztLq")) {
    ll[0] = *p;
    ++p;
  } else if (*p == 'h') {
    ll[0] = 'h';
    ++p;
    if (*p == 'h') {
      ll[1] = 'h';
      ++p;
    }
  } else if (*p == 'l') {
    ll[0] = 'l';
    ++p;
    if (*p == 'l') {
      ll[1] = 'l';
      ++p;
    }
  }
  return p;
}

// Returns true if the character is an integer conversion specifier.
static bool format_is_integer_conv(char c) {
#if SANITIZER_GLIBC
  if (char_is_one_of(c, "bB"))
    return true;
#endif
  return char_is_one_of(c, "diouxXn");
}

// Returns true if the character is an floating point conversion specifier.
static bool format_is_float_conv(char c) {
  return char_is_one_of(c, "aAeEfFgG");
}

// Returns string output character size for string-like conversions,
// or 0 if the conversion is invalid.
static int format_get_char_size(char convSpecifier,
                                const char lengthModifier[2]) {
  if (char_is_one_of(convSpecifier, "CS")) {
    return sizeof(wchar_t);
  }

  if (char_is_one_of(convSpecifier, "cs[")) {
    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
      return sizeof(wchar_t);
    else if (lengthModifier[0] == '\0')
      return sizeof(char);
  }

  return 0;
}

enum FormatStoreSize {
  // Store size not known in advance; can be calculated as wcslen() of the
  // destination buffer.
  FSS_WCSLEN = -2,
  // Store size not known in advance; can be calculated as strlen() of the
  // destination buffer.
  FSS_STRLEN = -1,
  // Invalid conversion specifier.
  FSS_INVALID = 0
};

// Returns the memory size of a format directive (if >0), or a value of
// FormatStoreSize.
static int format_get_value_size(char convSpecifier,
                                 const char lengthModifier[2],
                                 bool promote_float) {
  if (format_is_integer_conv(convSpecifier)) {
    switch (lengthModifier[0]) {
    case 'h':
      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
    case 'l':
      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
    case 'q':
      return sizeof(long long);
    case 'L':
      return sizeof(long long);
    case 'j':
      return sizeof(INTMAX_T);
    case 'z':
      return sizeof(SIZE_T);
    case 't':
      return sizeof(PTRDIFF_T);
    case 0:
      return sizeof(int);
    default:
      return FSS_INVALID;
    }
  }

  if (format_is_float_conv(convSpecifier)) {
    switch (lengthModifier[0]) {
    case 'L':
    case 'q':
      return sizeof(long double);
    case 'l':
      return lengthModifier[1] == 'l' ? sizeof(long double)
                                           : sizeof(double);
    case 0:
      // Printf promotes floats to doubles but scanf does not
      return promote_float ? sizeof(double) : sizeof(float);
    default:
      return FSS_INVALID;
    }
  }

  if (convSpecifier == 'p') {
    if (lengthModifier[0] != 0)
      return FSS_INVALID;
    return sizeof(void *);
  }

  return FSS_INVALID;
}

struct ScanfDirective {
  int argIdx; // argument index, or -1 if not specified ("%n$")
  int fieldWidth;
  const char *begin;
  const char *end;
  bool suppressed; // suppress assignment ("*")
  bool allocate;   // allocate space ("m")
  char lengthModifier[2];
  char convSpecifier;
  bool maybeGnuMalloc;
};

// Parse scanf format string. If a valid directive in encountered, it is
// returned in dir. This function returns the pointer to the first
// unprocessed character, or 0 in case of error.
// In case of the end-of-string, a pointer to the closing \0 is returned.
static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
                                    ScanfDirective *dir) {
  internal_memset(dir, 0, sizeof(*dir));
  dir->argIdx = -1;

  while (*p) {
    if (*p != '%') {
      ++p;
      continue;
    }
    dir->begin = p;
    ++p;
    // %%
    if (*p == '%') {
      ++p;
      continue;
    }
    if (*p == '\0') {
      return nullptr;
    }
    // %n$
    p = maybe_parse_param_index(p, &dir->argIdx);
    CHECK(p);
    // *
    if (*p == '*') {
      dir->suppressed = true;
      ++p;
    }
    // Field width
    if (*p >= '0' && *p <= '9') {
      p = parse_number(p, &dir->fieldWidth);
      CHECK(p);
      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
        return nullptr;
    }
    // m
    if (*p == 'm') {
      dir->allocate = true;
      ++p;
    }
    // Length modifier.
    p = maybe_parse_length_modifier(p, dir->lengthModifier);
    // Conversion specifier.
    dir->convSpecifier = *p++;
    // Consume %[...] expression.
    if (dir->convSpecifier == '[') {
      if (*p == '^')
        ++p;
      if (*p == ']')
        ++p;
      while (*p && *p != ']')
        ++p;
      if (*p == 0)
        return nullptr; // unexpected end of string
                        // Consume the closing ']'.
      ++p;
    }
    // This is unfortunately ambiguous between old GNU extension
    // of %as, %aS and %a[...] and newer POSIX %a followed by
    // letters s, S or [.
    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
        !dir->lengthModifier[0]) {
      if (*p == 's' || *p == 'S') {
        dir->maybeGnuMalloc = true;
        ++p;
      } else if (*p == '[') {
        // Watch for %a[h-j%d], if % appears in the
        // [...] range, then we need to give up, we don't know
        // if scanf will parse it as POSIX %a [h-j %d ] or
        // GNU allocation of string with range dh-j plus %.
        const char *q = p + 1;
        if (*q == '^')
          ++q;
        if (*q == ']')
          ++q;
        while (*q && *q != ']' && *q != '%')
          ++q;
        if (*q == 0 || *q == '%')
          return nullptr;
        p = q + 1; // Consume the closing ']'.
        dir->maybeGnuMalloc = true;
      }
    }
    dir->end = p;
    break;
  }
  return p;
}

static int scanf_get_value_size(ScanfDirective *dir) {
  if (dir->allocate) {
    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
      return FSS_INVALID;
    return sizeof(char *);
  }

  if (dir->maybeGnuMalloc) {
    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
      return FSS_INVALID;
    // This is ambiguous, so check the smaller size of char * (if it is
    // a GNU extension of %as, %aS or %a[...]) and float (if it is
    // POSIX %a followed by s, S or [ letters).
    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
  }

  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
    unsigned charSize =
        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
    if (charSize == 0)
      return FSS_INVALID;
    if (dir->fieldWidth == 0) {
      if (!needsTerminator)
        return charSize;
      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
    }
    return (dir->fieldWidth + needsTerminator) * charSize;
  }

  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
}

// Common part of *scanf interceptors.
// Process format string and va_list, and report all store ranges.
// Stops when "consuming" n_inputs input items.
static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
                         const char *format, va_list aq) {
  CHECK_GT(n_inputs, 0);
  const char *p = format;

  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);

  while (*p) {
    ScanfDirective dir;
    p = scanf_parse_next(p, allowGnuMalloc, &dir);
    if (!p)
      break;
    if (dir.convSpecifier == 0) {
      // This can only happen at the end of the format string.
      CHECK_EQ(*p, 0);
      break;
    }
    // Here the directive is valid. Do what it says.
    if (dir.argIdx != -1) {
      // Unsupported.
      break;
    }
    if (dir.suppressed)
      continue;
    int size = scanf_get_value_size(&dir);
    if (size == FSS_INVALID) {
      Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
             SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
      break;
    }
    void *argp = va_arg(aq, void *);
    if (dir.convSpecifier != 'n')
      --n_inputs;
    if (n_inputs < 0)
      break;
    if (size == FSS_STRLEN) {
      size = internal_strlen((const char *)argp) + 1;
    } else if (size == FSS_WCSLEN) {
      // FIXME: actually use wcslen() to calculate it.
      size = 0;
    }
    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
    // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well.
    if (dir.allocate) {
      if (char *buf = *(char **)argp) {
        if (dir.convSpecifier == 'c')
          size = 1;
        else if (dir.convSpecifier == 'C')
          size = sizeof(wchar_t);
        else if (dir.convSpecifier == 'S')
          size = (internal_wcslen((wchar_t *)buf) + 1) * sizeof(wchar_t);
        else  // 's' or '['
          size = internal_strlen(buf) + 1;
        COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
      }
    }
  }
}

#if SANITIZER_INTERCEPT_PRINTF

struct PrintfDirective {
  int fieldWidth;
  int fieldPrecision;
  int argIdx; // width argument index, or -1 if not specified ("%*n$")
  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
  const char *begin;
  const char *end;
  bool starredWidth;
  bool starredPrecision;
  char lengthModifier[2];
  char convSpecifier;
};

static const char *maybe_parse_number(const char *p, int *out) {
  if (*p >= '0' && *p <= '9')
    p = parse_number(p, out);
  return p;
}

static const char *maybe_parse_number_or_star(const char *p, int *out,
                                              bool *star) {
  if (*p == '*') {
    *star = true;
    ++p;
  } else {
    *star = false;
    p = maybe_parse_number(p, out);
  }
  return p;
}

// Parse printf format string. Same as scanf_parse_next.
static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
  internal_memset(dir, 0, sizeof(*dir));
  dir->argIdx = -1;
  dir->precisionIdx = -1;

  while (*p) {
    if (*p != '%') {
      ++p;
      continue;
    }
    dir->begin = p;
    ++p;
    // %%
    if (*p == '%') {
      ++p;
      continue;
    }
    if (*p == '\0') {
      return nullptr;
    }
    // %n$
    p = maybe_parse_param_index(p, &dir->precisionIdx);
    CHECK(p);
    // Flags
    while (char_is_one_of(*p, "'-+ #0")) {
      ++p;
    }
    // Field width
    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
                                   &dir->starredWidth);
    if (!p)
      return nullptr;
    // Precision
    if (*p == '.') {
      ++p;
      // Actual precision is optional (surprise!)
      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
                                     &dir->starredPrecision);
      if (!p)
        return nullptr;
      // m$
      if (dir->starredPrecision) {
        p = maybe_parse_param_index(p, &dir->precisionIdx);
        CHECK(p);
      }
    }
    // Length modifier.
    p = maybe_parse_length_modifier(p, dir->lengthModifier);
    // Conversion specifier.
    dir->convSpecifier = *p++;
    dir->end = p;
    break;
  }
  return p;
}

static int printf_get_value_size(PrintfDirective *dir) {
  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
    unsigned charSize =
        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
    if (charSize == 0)
      return FSS_INVALID;
    if (char_is_one_of(dir->convSpecifier, "sS")) {
      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
    }
    return charSize;
  }

  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
}

#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
  do {                                                             \
    if (format_is_float_conv(convSpecifier)) {                     \
      switch (size) {                                              \
      case 8:                                                      \
        va_arg(*aq, double);                                       \
        break;                                                     \
      case 12:                                                     \
        va_arg(*aq, long double);                                  \
        break;                                                     \
      case 16:                                                     \
        va_arg(*aq, long double);                                  \
        break;                                                     \
      default:                                                     \
        Report("WARNING: unexpected floating-point arg size"       \
               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
        return;                                                    \
      }                                                            \
    } else {                                                       \
      switch (size) {                                              \
      case 1:                                                      \
      case 2:                                                      \
      case 4:                                                      \
        va_arg(*aq, u32);                                          \
        break;                                                     \
      case 8:                                                      \
        va_arg(*aq, u64);                                          \
        break;                                                     \
      default:                                                     \
        Report("WARNING: unexpected arg size"                      \
               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
        return;                                                    \
      }                                                            \
    }                                                              \
  } while (0)

// Common part of *printf interceptors.
// Process format string and va_list, and report all load ranges.
static void printf_common(void *ctx, const char *format, va_list aq) {
  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);

  const char *p = format;

  while (*p) {
    PrintfDirective dir;
    p = printf_parse_next(p, &dir);
    if (!p)
      break;
    if (dir.convSpecifier == 0) {
      // This can only happen at the end of the format string.
      CHECK_EQ(*p, 0);
      break;
    }
    // Here the directive is valid. Do what it says.
    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
      // Unsupported.
      break;
    }
    if (dir.starredWidth) {
      // Dynamic width
      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
    }
    if (dir.starredPrecision) {
      // Dynamic precision
      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
    }
    // %m does not require an argument: strlen(errno).
    if (dir.convSpecifier == 'm')
      continue;
    int size = printf_get_value_size(&dir);
    if (size == FSS_INVALID) {
      static int ReportedOnce;
      if (!ReportedOnce++)
        Report(
            "%s: WARNING: unexpected format specifier in printf "
            "interceptor: %.*s (reported once per process)\n",
            SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
      break;
    }
    if (dir.convSpecifier == 'n') {
      void *argp = va_arg(aq, void *);
      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
      continue;
    } else if (size == FSS_STRLEN) {
      if (void *argp = va_arg(aq, void *)) {
        uptr len;
        if (dir.starredPrecision) {
          // FIXME: properly support starred precision for strings.
          len = 0;
        } else if (dir.fieldPrecision > 0) {
          // Won't read more than "precision" symbols.
          len = internal_strnlen((const char *)argp, dir.fieldPrecision);
          if (len < (uptr)dir.fieldPrecision)
            len++;
        } else {
          // Whole string will be accessed.
          len = internal_strlen((const char *)argp) + 1;
        }
        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len);
      }
    } else if (size == FSS_WCSLEN) {
      if (void *argp = va_arg(aq, void *)) {
        // FIXME: Properly support wide-character strings (via wcsrtombs).
        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0);
      }
    } else {
      // Skip non-pointer args
      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
    }
  }
}

#endif // SANITIZER_INTERCEPT_PRINTF
